Read in the data
#normal data
ed <- fread(here::here("data/education/combined_education.csv")) %>%
mutate(year = factor(year))
ed$year <- revalue(ed$year,c("1415" = "2014-2015", "1516" = "2015-2016", "1617" = "2016-2017",
"1718" = "2017-2018", "1819" = "2018-2019"))
ed <- ed %>% filter(District.Name == "South Wasco County SD 1" |
District.Name == "Jefferson County SD 509J" |
District.Name == "North Wasco County SD 21" |
District.Name == "Sherman County SD" |
District.Name == "Dufur SD 29" |
District.Name == "Hood River County SD") %>%
select(-c("District.ID" ,"Student.Group","Fall.Membership", "Free.Reduced.Priced.Lunch","Percent.Regular.Attenders" ))
ed.increase <- ed %>% select("year" ,"District.Name","On.Time.Grad.Rate", "Teacher.Experience.Pct","Percent.ELA.Proficient.Change")
ed.decrease <- ed %>% select("year" ,"District.Name","Percent.Economically.Disadvantaged", "Percent.Chronically.Absent","Dropout.Rate")
### increaseing variables
ed.melt.increase = melt(ed.increase, id.vars = c("year", "District.Name"),
measure.vars = c("On.Time.Grad.Rate", "Teacher.Experience.Pct",
"Percent.ELA.Proficient.Change")) %>%
mutate(variable = factor(variable, levels = c("On.Time.Grad.Rate", "Teacher.Experience.Pct",
"Percent.ELA.Proficient.Change"))) %>%
mutate(variable = recode(variable, "On.Time.Grad.Rate" = "On Time Graduation",
"Teacher.Experience.Pct" = "Teacher Experience",
"Percent.ELA.Proficient.Change" = "ELA Proficiency Change"))
ed.melt.decrease = melt(ed.decrease, id.vars = c("year", "District.Name"),
measure.vars = c("Percent.Economically.Disadvantaged", "Percent.Chronically.Absent",
"Dropout.Rate")) %>%
mutate(variable = recode(variable, "Percent.Economically.Disadvantaged" = "Economic Disadvantage",
"Percent.Chronically.Absent" = "Chronic Absenteeism",
"Dropout.Rate"="Dropout Rate"))
benefits <- ggplot(ed.melt.increase, aes(y = District.Name, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,0)), color = "black", size = 3.5) +
coord_equal() +
#facet_grid(rows = vars(variable)) +
facet_wrap(~variable, ncol=1) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0, breaks= c(-75, -50, -25, 0, 25, 50, 75, 100), limits = c(-75, 100)) +
theme(axis.text.x = element_text(angle = 35, vjust = 1, hjust=1),
strip.background = element_rect(
color="black", fill="#ADB5BD", size=1, linetype="solid"),
strip.text.y = element_text(size = 5, color = "black", face = "bold")) +
labs(title = "Benefits to Student Success", x ="School Year", y = "", fill="Percent")
benefits
## Warning: Removed 12 rows containing missing values (geom_text).
barriers<- ggplot(ed.melt.decrease, aes(y = District.Name, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,0)), color = "black", size = 3.5) +
coord_equal() +
#facet_grid(rows = vars(variable)) +
facet_wrap(~variable, ncol=1) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_sequential(palette = "Purples 3") +
theme(axis.text.x = element_text(angle = 35, vjust = 1, hjust=1),
strip.background = element_rect(
color="black", fill="#ADB5BD", size=1, linetype="solid"),
strip.text.y = element_text(size = 4, color = "black", face = "bold")) +
labs(title = "Barriers to Student Success", x ="School Year", y = "", fill="Percent")
barriers
plot_grid(
benefits, barriers,
labels = "AUTO", ncol = 2
)
## Warning: Removed 12 rows containing missing values (geom_text).
prepare table to make a heatmap
## for all the education data with original values
ed.melt = melt(ed, id.vars = c("year", "District.Name"),
measure.vars = c("On.Time.Grad.Rate", "Dropout.Rate" ,
"Percent.ELA.Proficient.Change", "Teacher.Experience.Pct", "Percent.Chronically.Absent",
"Percent.Economically.Disadvantaged"))
for south wasco alone:
sw <- filter(ed.melt, District.Name == "South Wasco County SD 1")
ggplot(sw, aes(y = variable, x = factor(year), fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
scale_fill_viridis() +
coord_equal()
Facet wrap to show all schools
ggplot(ed.melt, aes(y = variable, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
scale_fill_viridis(option = "D") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("School Year")
Trying with rwb divergent palette: not working - Continuous value supplied to discrete scale.
continuous scale palettes: https://biostats.w.uib.no/color-scale-for-continuous-variables/ controlling midpoint and range for color palettes: https://stackoverflow.com/questions/58718527/setting-midpoint-for-continuous-diverging-color-scale-on-a-heatmap
# interval <- c(-60, -20, -10, -5, seq(0,100,5))
# data.values <- as.vector(na.omit(ed.melt$value))
# color_rwb <- cut(data.values, breaks=interval, labels = rwb(24))
pgcol <- brewer.pal(9, "PRGn")
pgpal <- colorRampPalette(pgcol)
ggplot(ed.melt, aes(y = variable, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0)
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
xlab("School Year")
ggplot(ed.melt.increase, aes(y = variable, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title = "Benefits to Student Success", x ="School Year", y = "", fill="Percent")
## Warning: Removed 12 rows containing missing values (geom_text).
ggplotly(ggplot(ed.melt.decrease, aes(y = variable, x = year, fill = value,
text = paste0("School District: ", District.Name,
"<br>Year: ", year,
"<br>Percent ", variable, ": ", value, "%"))) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() + facet_wrap(~District.Name) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_sequential(palette = "Purples 3") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
labs(title = "Barriers to Student Success",
x ="School Year", y = "", fill="Percent"),
tooltip = "text")%>%
config(displayModeBar = "static", displaylogo = FALSE,
modeBarButtonsToRemove=list("zoom2d","select2d","lasso2d","hoverClosestCartesian",
"hoverCompareCartesian","resetScale2d"))
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
ed.melt.increase2 <- ed.melt.increase %>% mutate(indicator = paste(variable, District.Name, sep= "-"))
ggplot(ed.melt.increase2, aes(y = indicator, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
## banners
geom_hline(yintercept = 6.5, color = "white", lwd = 5) +
geom_hline(yintercept = 12.5, color = "white", lwd = 5) +
geom_hline(yintercept = 18.5, color = "white", lwd = 5) +
# Title of variables
annotate("text", x=2, y=6.5,label="ELA Proficiency Change", fontface=2, size = 3, color="black") +
annotate("text", x=2, y=12.5,label="On Time Graduation", fontface=2, size = 3, color="black") +
annotate("text", x=2, y=18.5,label="Teacher Experience", fontface=2, size = 3, color="black") +
#scale_y_discrete(limit = rep(c("South Wasco County SD 1", "Sherman County SD" , "North Wasco County SD 21",
#"Jefferson County SD 509J", "Hood River County SD","Dufur SD 29"), 3)) +
xlab("School Year")
## Warning: Removed 12 rows containing missing values (geom_text).
Try making heatmaps with cowplot each variable is its own heat map with all the schools.
ed.grad <- filter(ed.melt.increase, variable == "On Time Graduation")
ed.teach <- filter(ed.melt.increase, variable == "Teacher Experience")
ed.ela <- filter(ed.melt.increase, variable == "ELA Proficiency Change")
grad <- ggplot(ed.grad, aes(y = District.Name, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank())
teach <- ggplot(ed.teach, aes(y = District.Name, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
theme(axis.title.x=element_blank(),
axis.text.x=element_blank())
ela <- ggplot(ed.ela, aes(y = District.Name, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
plot_grid(
grad, teach, ela,
labels = c("On Time Graduation", "Teacher Experience", "ELA Proficiency Change"), ncol = 1
)
## Warning: Removed 12 rows containing missing values (geom_text).
plotly version
ggplotly(ggplot(ed.melt.increase, aes(y = District.Name, x = year, fill = value,
text = paste0("School District: ", District.Name,
"<br>Year: ", year,
"<br>Percent ", variable, ": ", value, "%"))) +
geom_tile(color = "#ADB5BD") + #gray
#geom_text(aes(label = round(value,0)), color = "black") +
coord_equal() +
#facet_grid(rows = vars(variable)) +
facet_wrap(~variable, ncol=1) +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_divergingx(palette = "PRGn", mid = 0) +
theme(axis.text.x = element_text(angle = 20, vjust = 0.5, hjust=1)) +
labs(title = "Benefits to Student Success", x ="School Year", y = "", fill="Percent"), tooltip="text")
ed.melt.decrease2 <- ed.melt.decrease %>% mutate(indicator = paste(variable, District.Name, sep= "-"))
ggplot(ed.melt.decrease2, aes(y = indicator, x = year, fill = value)) +
geom_tile(color = "#ADB5BD") + #gray
geom_text(aes(label = round(value,1)), color = "black") +
coord_equal() +
#scale_fill_gradientn(colors = pgcol, values = c(-60, 0, 100)) +
scale_fill_continuous_sequential(palette = "Purples 3") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
## banners
geom_hline(yintercept = 6.5, color = "white", lwd = 5) +
geom_hline(yintercept = 12.5, color = "white", lwd = 5) +
geom_hline(yintercept = 18.5, color = "white", lwd = 5) +
# Title of variables
annotate("text", x=2, y=6.5,label="Chronic Absenteeism", fontface=2, size = 3, color="black") +
annotate("text", x=2, y=12.5,label="Dropout", fontface=2, size = 3, color="black") +
annotate("text", x=2, y=18.5,label="Economically Disadvantaged", fontface=2, size = 3, color="black") +
xlab("School Year")